#######################################################
## 					REPLICATION MATERIAL 
## 
## 			"Do Authoritarian Elections Help the Poor? 
##				Evidence from Russian Cities" 
## 
##					   	   by 				 			 	  
##			Quintin H. Beazer & Ora John Reuter
##
##						   in							 	  
## 				The Journal of Politics		 	  
##													 	  
#######################################################


# File created and checked May 2020 using R 3.6.3 (x64). To ask questions 
# or report a suspected error, please contact the authors. We are happy 
# to correspond with others about the project. Send emails to: 
# Quintin Beazer (qbeazer@fsu.edu) or John Reuter (reutero@uwm.edu) 


## TABLE OF CONTENTS
#####################

# This .R script contains code for reproducing the figures  
# presented within the article's main body and for Table A14, 
# the sensitivity analysis results.

# Replication code for the main analyses appears separately in the 
# accompanying .do file: BeazerReuterHousingTables.do.  
# This file will help reproduce the following:



## FIGURES
# Figure 1: Russian Household Survey: Respondents with 
#           Appointed Mayors are More Dissatisfied with Public Services

# Figure 2: Marginal Effects of Mayoral Appointments on Bad Housing Stock, 
#           Conditional on Delivery of United Russia Votes

## TABLE
# Table A14: Sensitivity Analysis: Estimates of 
#            Appointments on Bad Housing, 
#            Depending on Assumptions about Unobserved Confounders



#=============================================
#   PREAMBLE                              ####
#=============================================
graphics.off() # CLOSE ALL OPEN GRAPHICS WINDOWS
rm(list = ls())   # REMOVE ALL OBJECTS

setwd("XXXXX")  # replace with your directory path


## load libraries ##

# packages <- c("tidyverse","lfe","ggplot2","gridExtra")
# install.packages(packages)

library(foreign)
library(lfe)
library(tidyverse)
library(ggplot2)
library(gridExtra)


##################
#### FIGURE 1 ####
##################

# load summary of RLMS items

# requires first running file: BeazerReuterRLMS2009dotplot.do 
rdata <- read.dta("RLMS2009publicgoods.dta") 
glimpse(rdata)

# reordering columns for desired order in plot
rdata <- rdata %>% 
  select(city, appointed, satbldg, everything())

## FUNCTION to get means and confidence intervals
means.fun <- function(x){
  a1 <- rdata %>% 
    select(var = x, app = appointed) %>% 
    split(.$app) %>% 
    map(~broom::tidy(t.test(.$var))) %>% 
    bind_rows(.,.id = "app")  %>% 
    select(app, estimate, conf.low, conf.high) %>% 
    mutate(item = x)
    return(a1)
  }

# running list of items through means/interval functions
varlist <- colnames(rdata)[-(1:2)]

dotsdata <- varlist %>% 
  map(means.fun) %>% 
  bind_rows() %>% 
  rowid_to_column()

# making new variables for plotting purposes
dotsdata <- dotsdata %>% 
  mutate(raion = ifelse(rowid > 10,
                        "Concerns about Neighborhood",
                        "Apartment Building Problems"),
         appcity = ifelse(app==1,"Appointed","Elected"))

# creating labels for item names in plot
item_names <- c(
  'satbldg' = "Needs Major Repair",
  'satwater' = "Irregular Water Supply",
  'satgas' = "Irregular Gas Supply",
  'satwaste' = "Irregular Waste Disposal",
  'satsec' = "Lack of Security",
  'satstreets' = "Dirty Streets",
  'satparks' = "Lack of Green Spaces",
  'satbugs2' = "Pest Control",
  'satair' = "Air Pollution",
  'satwater2' = "Water Contamination"
)


## setting the basic plot
rdots <- ggplot(dotsdata, aes(x = reorder(item, -rowid), 
                       y = estimate, 
                       group = appcity, color = appcity)) +
  geom_pointrange(aes(ymin=conf.low,
                      ymax=conf.high),
                  size = 1.25) +
  scale_colour_manual(values = c("grey30","grey60"), 
                      name="", 
                      labels=c("Appointed\nMayor", "Elected\nMayor")) +
  coord_flip() +
  theme_minimal() 


## adjusting plot labels and axes
fig1 <- rdots + facet_wrap(~raion, ncol=1 ,scales = "free_y") +
  labs(y = "Proportion of Surveyed Households", x = "") +
  theme(#panel.grid.minor.x = element_blank(),
    plot.title = element_text(lineheight=3,face="bold", 
                              color="black", size=24,
                              hjust = 0.5),
    axis.text = element_text(size=16),
    axis.title = element_text(face="bold",size=16),
    axis.title.x = element_text(margin = margin(20,0,0,0) ),
    legend.text = element_text(size = 14, face = "bold"),
    legend.position = "bottom",
    legend.justification=c(-.75,0.1),
    strip.text.x = element_text(size = 20, 
                                face = "bold",
                                hjust=0,
                                margin=margin(10,0,10,0)),
    panel.spacing.y = unit(2, "lines"),
    plot.background = element_rect(fill = "transparent", color = NA),
    panel.background = element_rect(fill = "transparent", color = NA)) +
    scale_x_discrete(labels = item_names)  

## viewing plot
windows(height = 12, width=9)
fig1

## saving plot
ggsave(filename = "fg1.png",
       plot = fig1,
       units = "in",
       dpi = 600,
       bg = "transparent",
       height = 12, width=9)

# ## high res output
# tiff(filename = "fg1.tif",
#        units = "in",
#        res = 600,
#        bg = "transparent",
#        height = 12, width=9)
# fig1
# dev.off()







##################
#### FIGURE 2 ####
##################

## load housing dataset
hdata <- read.dta("zhilJOP.dta")

## adjust vars for use in R
hdata$pressfreedom <- as.numeric(hdata$pressfreedom)

hdata <- hdata %>%
  group_by(city_id) %>%
  mutate(fappointed = lead(appointed, order_by=city_id),
         lappointed = lag(appointed, order_by=city_id),
         lURduma = lag(URduma, order_by=city_id),
         lURhigh = lag(URhigh, order_by=city_id))


#### HELPER FUNCTIONS FOR PLOTTING INTERACTION ####

## function to extract estimates from lfe model

me.params <- function(mod,x,z){
  beta1 <<-  mod$coefficients[x,] 
  beta2 <<- mod$coefficients[z,] 
  xz <- paste(x, z, sep = ":")
  beta3 <<-mod$coefficients[xz,] 
  var1 <<- vcov(mod)[x, x]
  var2 <<- vcov(mod)[z, z]
  var3 <<- vcov(mod)[xz, xz]
  cov13 <<- vcov(mod)[x, xz]
  cov23 <<- vcov(mod)[z, xz] 
}
#note: writes to global environment



## to turn estimates into dataframe of plot values

me.x <- function(z) {
  me <- matrix(rep(NA), nrow = length(z), ncol = 3)
  me[,1] <- beta1 + beta3*z
  me[,2] <- beta1 + beta3*z - 1.96*sqrt(var1 + (z^2)*var3 + 2*z*cov13)
  me[,3] <- beta1 + beta3*z + 1.96*sqrt(var1 + (z^2)*var3 + 2*z*cov13)
  me <- as.data.frame(me)
  print(me)
} 

me.z <- function(x) {
  me <- matrix(rep(NA), nrow = length(x), ncol = 3)
  me[,1] <- beta2 + beta3*x
  me[,2] <- beta2 + beta3*x - 1.96*sqrt(var2 + (x^2)*var3 + 2*x*cov23)
  me[,3] <- beta2 + beta3*x + 1.96*sqrt(var2 + (x^2)*var3 + 2*x*cov23)
  me <- as.data.frame(me)
  print(me)
} 



##########################################################
#### FIG 2, LEFT PANEL: TABLE 4, MODEL 1 (CONTINUOUS) ####
##########################################################

## NOTE:
# felm standard errors do not match Stata xtreg estimates exactly due to different df adjustment in cluster SE in R
# In general, the reported SE estimates in the paper from Stata are larger (but trivially so).
# Differences are very slight (in the second or third decimal place).
# Values from R models used here for convenience, but Stata estimates provided below for optional use.

# ## NUMERICAL ESTIMATES FROM STATA (for optional use)
# beta1 <- -21.78344
# beta2 <- -.3389785
# beta3 <- .8169756
# var1 <- 199.6188
# var2 <- .11311337
# var3 <- .12929409
# cov13 <- -4.2366498
# cov23 <-  -.04116414



## model 1
dumaXfe.mod <- felm(badzhil ~ appointed*lURduma + zhiltot +
                      pressfreedom + workpop + AvgSalary1 + 
                      regdem + birthrate + regprom + pop |
                      year + city_id | 0 | city_id, 
                    data=hdata,keepX = T)
summary(dumaXfe.mod) 


## calling function to extract model estimates to global environment
me.params(dumaXfe.mod,"appointed","lURduma")


# taking observed values X and Z for plotting ranges
xvals <- c(0,1)  # appointed (dummy)
zvals <- seq(20,100,1) # Duma vote share for UR
zvalsdv <- c(0,1) # High/low Duma vote share for UR (dummy)


## calling function to create data frames for ggplot
meApp <- me.x(zvals) %>% 
  mutate(xvalues = zvals) %>% 
  rename(marg = V1,
         lower = V2,
         upper = V3)

meDuma <- me.z(xvals) %>% 
  mutate(xvalues = xvals,  ## use next line for high/low option
         xvalues2 = ifelse(xvalues==0,"elected", "appointed")) %>% 
  rename(marg = V1,
         lower = V2,
         upper = V3)



## marginal effects of APPOINTMENTS 

## plotting
appPlot <- ggplot(data = meApp, 
                  aes(x = xvalues, y = marg)) + 
  geom_line(size = 1.5) +
  geom_ribbon(aes(ymin = lower, ymax = upper), 
              alpha = 0.5) 

## to add rug plot, need dataframe from model + variable with same name as marginal value var

pdata <- dumaXfe.mod$X %>% 
  as.data.frame() %>% 
  mutate(marg = 1)

## with rug plot
appPlot <- appPlot +  
  geom_rug(data = pdata, 
           aes(x = lURduma),
           sides = "b") +
  geom_hline(yintercept = 0, linetype = 2) +
  theme_minimal()

## adjusting labels and axes
appPlot <- appPlot +
  theme(#panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    axis.text = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 16, face = "bold"),
    axis.title.y = element_text(margin=margin(r=20, l=10)),
    axis.title.x = element_text(margin=margin(t=15, b=5)),
    plot.background = element_rect(fill = "transparent", color = NA),
    panel.background = element_rect(fill = "transparent", color = NA)
  ) + 
  labs( y = "Marginal Effect of Appointment on\n Bad Housing (1000s sq. m)",
        x = "United Russia Vote Share in Recent Duma Election")

## inspecting plot
windows(height = 5, width = 7)
appPlot


## saving plots in PNG
# ggsave(filename = "MEapp.png",
#        plot = appPlot, 
#        units = "in",
#        bg = "transparent"
#        height = 5, width = 7)





#######################################################
#### FIG 2, RIGHT PANEL: TABLE 4, MODEL 2 (BINARY) ####
#######################################################

## UR DUMA HIGH/NOT HIGH ####

# ## ESTIMATES FROM STATA (for optional use)
# beta1 <- 10.17442
# beta2 <- -10.76862
# beta3 <- 40.42763
# var1 <- 58.001783
# var2 <- 71.95193 
# var3 <- 264.84419
# cov13 <- 49.65552
# cov23 <-  -73.763983


## model 2
dumaXfedv.mod <- felm(badzhil ~ appointed*lURhigh + zhiltot +
                        pressfreedom + workpop + AvgSalary1 +
                        regdem + birthrate + regprom + pop |
                        year + city_id | 0 | city_id,
                      data = hdata, keepX = T)
summary(dumaXfedv.mod)


## calling function to extract model estimates to global environment
me.params(dumaXfedv.mod,"appointed","lURhigh")

## taking observed values X and Z for plotting ranges
xvals <- c(0,1)  # appointed (dummy)
zvals <- seq(20,100,1) # Duma vote share for UR
zvalsdv <- c(0,1) # High/low Duma vote share for UR (dummy)


## data frames for ggplot
meAppdv <- me.x(zvalsdv) %>% 
  mutate(xvalues = zvalsdv,
         xvalues2 = ifelse(xvalues==0,"No", "Yes")) %>% 
  rename(marg = V1,
         lower = V2,
         upper = V3)

meDumadv <- me.z(xvals) %>% 
  mutate(xvalues = xvals,  ## use next line for high/low option
         xvalues2 = ifelse(xvalues==0,"elected", "appointed")) %>% 
  rename(marg = V1,
         lower = V2,
         upper = V3)

## plotting
appdvPlot <- ggplot(data = meAppdv, 
                    aes(x = xvalues2, y = marg,
                        group = xvalues2,
                        color = xvalues2)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper),
                  size = 2, show.legend = F,
                  #col = "#0277BB" ) +
                  col = "black" ) +
  geom_hline(yintercept = 0, linetype = 2) +
  theme_minimal() 


## adjusting labels and axes
appdvPlot <- appdvPlot +
  theme(#panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    axis.text = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 16, face = "bold"),
    axis.title.y = element_text(margin=margin(r=20, l=10)),
    axis.title.x = element_text(margin=margin(t=15, b=5)),
    plot.background = element_rect(fill = "transparent", color = NA),
    panel.background = element_rect(fill = "transparent", color = NA)
  ) + 
  labs( y = "Marginal Effect of Appointment on\n Bad Housing (1000s sq. m)",
        x = "City in Top Quartile for United Russia Votes?")


## inspecting plot
windows(height = 5, width = 6)
appdvPlot


## saving plot in PNG
# ggsave(filename = "MEquartileBW.png",
#        plot = appdvPlot, 
#        units = "in",
#        bg ="transparent",
#        height = 5, width = 6)



## combining using gridExtra package

windows(height=5,width=14)
grid.arrange(appPlot, appdvPlot, ncol = 2, nrow = 1,
             widths=c(7, 7))

fig2 <- grid.arrange(appPlot, appdvPlot, ncol = 2, nrow = 1,
                            widths=c(7, 7))

## saving plot in PNG
ggsave(filename = "fg2.png",
       plot = fig2,
       dpi = 600,
       bg ="transparent",
       units = "in",
       height=5,width=14)





##################################################
### TABLE A14: Functions for sensitivity analysis 
##################################################

## Sensitivity analysis for linear model from VanderWeele (2011)

s.lm <- function(the.coef) 
{
  gamma=c(.5,1,5,10,30,50)  #effect of latent factor on Y
  delta=c(.1,.3,.5)
  bias.mat <- delta %*% t(gamma)
  colnames(bias.mat) <- gamma
  row.names(bias.mat) <- delta
  
  
  if(the.coef>=0){
    ###SENSITIVITY
    corrected <-the.coef-bias.mat
  }
  else{
    ###SENSITIVITY
    corrected<-the.coef+bias.mat
  }
  return(corrected)
}


## to get estimates, plug in value of coefficient

# example: beta coefficient for "appointment" from Table 1, model 1 
s.lm(20.1)








